#!/bin/bash

#SBATCH --partition=H100
#SBATCH --gres=gpu:2
#SBATCH --nodes=1
#SBATCH --nodelist=h100-1

cd $SLURM_SUBMIT_DIR
vllm serve model/Qwen3-32B  --max-model-len 15000 --enable-lora --lora-modules xwlw=/public/home/lab2/project/pub_ft_code/adapter/qlora/Qwen3-32B/xxwlw32/checkpoint-1114 --port 8004 --trust-remote-code --tensor_parallel_size 2 --max-lora-rank 64